In [13]:
import networkx as nx
import igraph as ig
import numpy as np
from matplotlib import pyplot as plt
%matplotlib inline
In [14]:
graph = ig.Graph.Read_Ncol('../omim/disease.net.w.txt', weights=True, names=True)
graph = graph.as_undirected(mode='collapse')
In [15]:
g = ig.Graph()
g.add_vertices([0,1,2,3,4])
g.add_edges([(0,1), (1,2), (2,3), (2,4)])
nx.draw(nx.from_edgelist(g.get_edgelist()))
In [16]:
er = ig.Graph.Erdos_Renyi(100, 0.1)
nx.draw(nx.from_edgelist(er.get_edgelist()))
In [17]:
grg = ig.Graph.GRG(100, 0.2)
nx.draw(nx.from_edgelist(grg.get_edgelist()))
In [18]:
ba = ig.Graph.Barabasi(100)
nx.draw(nx.from_edgelist(ba.get_edgelist()))
In [19]:
# power-law degree distribution
from collections import Counter
ba_max = ig.Graph.Barabasi(100000)
cnt = Counter(ba_max.degree()).items()
deg_cnt = sorted(cnt)
degs, cnts = list(zip(*deg_cnt))
freq_cnts = np.array(cnts)/np.sum(cnts)
plt.loglog(degs, freq_cnts, basex=2);
In [20]:
graph = ig.Graph.Read_Ncol('../omim/disease.net.w.txt', weights=True, names=True)
graph = graph.as_undirected(mode='collapse')
In [21]:
print('Nodes: %d' % graph.vcount())
print('Edges: %d' % graph.ecount())
c = graph.components(mode='strong')
giant = c.giant()
print('Strong conn. comp: %d' % len(c.sizes()))
print('Giant component: %d' % giant.vcount())
print('Closed triangles: %d' % graph.as_directed().triad_census().t300)
print('Diameter: %d' % giant.diameter())
print('Clustering: %3.3f' % graph.transitivity_avglocal_undirected())
In [22]:
nx_graph = nx.from_edgelist(graph.get_edgelist())
spring_pos = nx.spring_layout(nx_graph)
nx.draw(nx_graph, pos=spring_pos, node_size=3, node_color='black', edge_color='black', width=1)
In [23]:
f = open('../omim/disease_names.txt')
did2name = dict([line.strip().split('\t') for line in f])
f.close()
graph.vs['disease'] = [did2name[did] for did in graph.vs['name']]
In [24]:
f = open('../omim/disease_classes.txt')
did2class = dict([line.strip().split('\t') for line in f])
f.close()
dclasses = set(did2class.values())
graph.vs['dclass'] = [did2class[did] for did in graph.vs['name']]
c = ['orchid', 'peru', 'pink', 'plum', 'powderblue', 'purple', 'red', 'rosybrown',
'royalblue', 'orange', 'salmon', 'silver', 'skyblue', 'slategray', 'springgreen',
'steelblue', 'tomato', 'turquoise', 'violet', 'wheat', 'white', 'yellow']
class2color = {dclass : c[i] for i, dclass in enumerate(dclasses)}
colors = [class2color[n['dclass']] for n in graph.vs]
print('Disease classes:\n %s' % '\n '.join('%12s %s' % (class2color[dclass], dclass) for dclass in dclasses))
nx_graph = nx.from_edgelist(graph.get_edgelist())
spring_pos = nx.spring_layout(nx_graph)
nx.draw(nx_graph, pos=spring_pos, node_size=30, node_color=colors, edge_color='black', width=1)
In [25]:
# Keyword arguments can be used to filter the vertices based on their attributes or their structural properties.
eye_nodes = graph.vs.select(dclass='Ophthamological')
print('Eye-related diseases: %d' % len(eye_nodes))
subgraph = graph.subgraph(eye_nodes)
labels = dict(enumerate(subgraph.vs['disease']))
nx_graph = nx.Graph()
nx_graph.add_nodes_from(labels.keys())
nx_graph.add_edges_from(subgraph.get_edgelist())
print('Nodes: %d Edges: %d' % (len(nx_graph.nodes()), len(nx_graph.edges())))
spring_pos = nx.spring_layout(nx_graph)
nx.draw(nx_graph, pos=spring_pos, labels=labels, with_labels=False)
In [26]:
# Finding a single node with some properties
leukemia = graph.vs.find(disease="Leukemia")
leukemia_nghbs = leukemia.neighbors()
print('Neighboring nodes:\n %s' % '\n '.join(nghb['disease'] for nghb in leukemia_nghbs))
leukemia_paths = np.squeeze(np.array(leukemia.shortest_paths()))
# Nodes that are close to the ``leukemia'' in terms of their shortest path distance
nearby_idx = np.nonzero(leukemia_paths < 2)[0]
print()
print('Nearby nodes:\n %s' % '\n '.join(graph.vs[i]['disease'] for i in nearby_idx[:10]))
# Nodes that are far away from the ``leukemia'' in terms of their shortest path distance
distant_idx = np.nonzero(np.isinf(leukemia_paths))[0]
print()
print('Distant nodes:\n %s' % '\n '.join(graph.vs[i]['disease'] for i in distant_idx[:10]))
In [27]:
deg_dis = sorted([(n.degree(), n['disease']) for n in graph.vs], reverse=True)
print('Max deg.:\n%s' % '\n'.join('%d %s' % (deg, dis) for deg, dis in deg_dis[:10]))
In [28]:
nbs = graph.betweenness()
bs_dis = sorted([(bs, n['disease']) for bs, n in zip(nbs, graph.vs)], reverse=True)
print('Betweenness:\n%s' % '\n'.join('%4.3f %s' % (bs, dis) for bs, dis in bs_dis[:10]))
In [29]:
bs_id = sorted([(bs, i) for i, bs in enumerate(nbs)], reverse=True)
ids = list(zip(*bs_id))[1]
subgraph = graph.subgraph(ids[:7])
labels = dict(enumerate(subgraph.vs['disease']))
nx_graph = nx.Graph()
nx_graph.add_nodes_from(labels.keys())
nx_graph.add_edges_from(subgraph.get_edgelist())
spring_pos = nx.spring_layout(nx_graph)
nx.draw(nx_graph, pos=spring_pos, labels=labels, with_labels=True)
In [30]:
prs = graph.pagerank()
pr_dis = sorted([(pr, i) for i, pr in enumerate(prs)], reverse=True)
print('Pagerank:\n%s' % '\n'.join('%4.3f %s' % (pr, graph.vs[i]['disease']) for pr, i in pr_dis[:10]))
In [31]:
top_k = 10
top_ids = [i for _, i in pr_dis[:top_k]]
nx_graph = nx.from_edgelist(graph.get_edgelist())
colors = ['red' if i in top_ids else 'black' for i, _ in enumerate(prs)]
node_size = [10 + 20 * (top_k - top_ids.index(i)) if i in top_ids else 10 for i, _ in enumerate(prs)]
spring_pos = nx.spring_layout(nx_graph)
nx.draw(nx_graph, pos=spring_pos, node_color=colors, edge_color='black', node_size=node_size, width=1)
In [32]:
c = graph.components(mode='strong')
giant = c.giant()
rndwlk = giant.community_walktrap(steps=3).as_clustering()
mmbr_rndwlk = rndwlk.membership
n_cmt = len(set(mmbr_rndwlk))
cnt = Counter(mmbr_rndwlk)
print('C: %5.2f S: %5.2f' % (n_cmt, sum(cnt.values()) / n_cmt))
# Modularity is one measure of the structure of networks or graphs.
# It was designed to measure the strength of division of a network
# into modules.
# Networks with high modularity have dense connections between the nodes
# within modules but sparse connections between nodes in different modules.
# Modularity of S is the difference between mS, the number of edges between nodes in S,
# and E(mS), the expected number of such edges in a random graph with identical degree
# sequence. The value of the modularity lies in the range [−1/2,1). It is positive if
# the number of edges within groups exceeds the number expected on the basis of chance.
print('Modularity (random walks): %3.3f' % rndwlk.modularity)
In [33]:
nx_graph = nx.Graph()
nx_graph.add_edges_from(giant.get_edgelist())
spring_pos = nx.fruchterman_reingold_layout(nx_graph)
cmt2col = {cmt: np.random.rand(3) for cmt in set(mmbr_rndwlk)}
colors = [cmt2col[mmbr_rndwlk[i]] for i in range(giant.vcount())]
nx.draw(nx_graph, pos=spring_pos, node_color=colors, node_size=20, linewidths=0)
plt.savefig('../omim/omim-walktrap.pdf')
In [34]:
c = graph.components(mode='strong')
giant = c.giant()
lblprp = giant.community_label_propagation()
mmbr_lblprp = lblprp.membership
n_cmt = len(set(mmbr_lblprp))
cnt = Counter(mmbr_lblprp)
print('C: %5.2f S: %5.2f' % (n_cmt, sum(cnt.values())/n_cmt))
print('Modularity (label propagation): %3.3f' % lblprp.modularity)
In [35]:
nx_graph = nx.Graph()
nx_graph.add_edges_from(giant.get_edgelist())
spring_pos = nx.fruchterman_reingold_layout(nx_graph)
cmt2col = {cmt: np.random.rand(3) for cmt in set(mmbr_lblprp)}
colors = [cmt2col[mmbr_lblprp[i]] for i in range(giant.vcount())]
nx.draw(nx_graph, pos=spring_pos, node_color=colors, node_size=20, linewidths=0)
plt.savefig('../omim/omim-labprop.pdf')
In [36]:
c = graph.components(mode='strong')
giant = c.giant()
infomap = giant.community_infomap()
mmbr_infomap = infomap.membership
n_cmt = len(set(mmbr_infomap))
cnt = Counter(mmbr_infomap)
print('C: %5.2f S: %5.2f' % (n_cmt, sum(cnt.values())/n_cmt))
print('Modularity (Infomap): %3.3f' % infomap.modularity)
print('Codelength (Infomap): %3.3f' % infomap.codelength)
In [37]:
nx_graph = nx.Graph()
nx_graph.add_edges_from(giant.get_edgelist())
spring_pos = nx.fruchterman_reingold_layout(nx_graph)
cmt2col = {cmt: np.random.rand(3) for cmt in set(mmbr_infomap)}
colors = [cmt2col[mmbr_infomap[i]] for i in range(giant.vcount())]
nx.draw(nx_graph, pos=spring_pos, node_color=colors, node_size=20, linewidths=0)
plt.savefig('../omim/omim-infomap.pdf')
In [38]:
# Write the OMIM network in a format for explorative data analysis in Gephi
# Labeled edgelist
graph.write_graphml('../omim/omim_gephi.graphml')
# Now go and explore the network with Gephi (http://gephi.github.io/users/download/)!